* [] brackets indicate redacted internal census variable name or directory that can't be disclosed. ;

* define libraries;
libname icf '[directory location for geocoded worker residences]';
libname hannah  '/projects/users/########';
libname ben '/projects/users/########';

* get county centroids;
* projections are slightly different so use two sources just in case;
data work.counties;
   set maps.county;
   where state in (01 04 05 06 08 10 11 17 18 19 20 23 24 29 30 31 32 35 38 40 47 51 56);  /* FIPS codes for our states */
   format state z2. county z3.;
   state2 = put(state,z2. -L);
   county2 = put(county,z3. -L);
   ctyfips = cats(state2,county2);
run;
%CENTROID(work.counties,work.countycenters,ctyfips);
data work.countycenters;
set work.countycenters;
lat = y*45/atan(1);
long = -x*45/atan(1);
y1 = lat;
x1 = long;
drop lat long x y;
run;

data test;
  set mapsgfk.us_counties (where=(state in (01 04 05 06 08 10 11 17 18 19 20 23 24 29 30 31 32 35 38 40 47 51 56)));
  x = long;
  y = lat;
run;
%CENTROID(work.test,work.countycenters2,ID);
data work.countycenters2;
set work.countycenters2;
ctyfips = substr(ID,4,5);
run;

data work.countycenters;
  merge work.countycenters work.countycenters2 (keep = x y ctyfips);
  by ctyfips;
run;



* collapse wind plant data to county-annual;
proc sort data=ben.plants;
by t_fips p_year;
run;

proc import out = ben.plants_csm 
  datafile = "/projects/users/########/PlantLevelWindDatabaseCSM.dta"
  dbms=dta
  replace;
run;

data ben.plants_csm;
  set ben.plants_csm;
  stfips = substr(t_fips,1,2);
run;

data ben.plants_csm_23;
  set ben.plants_csm;
  where stfips = "01" or stfips = "04" or stfips = "05" or stfips = "06"
      or stfips = "08" or stfips = "10" or stfips = "11" or stfips = "17"
      or stfips = "18" or stfips = "19" or stfips = "20" or stfips = "23"
      or stfips = "24" or stfips = "29" or stfips = "30" or stfips = "31"
      or stfips = "32" or stfips = "35" or stfips = "38" or stfips = "40"
      or stfips = "47" or stfips = "51" or stfips = "56";
run;

/* same result;
data ben.plants_csm_23s;
  set ben.plants_csm;
  where t_state = "AL" or t_state = "AZ" or t_state = "AR" or t_state = "CA"
      or t_state = "CO" or t_state = "DE" or t_state = "DC" or t_state = "IL"
      or t_state = "IN" or t_state = "IA" or t_state = "KS" or t_state = "ME"
      or t_state = "MD" or t_state = "MO" or t_state = "MT" or t_state = "NE"
      or t_state = "NV" or t_state = "NM" or t_state = "ND" or t_state = "OK"
      or t_state = "TN" or t_state = "VA" or t_state = "WY";
run;
*/

proc sql;
  create table hannah.countywind as
  select t_fips, p_year,
  sum(p_tnum) as new_p_tnum,
  sum(p_cap) as new_p_cap,
  max(t_state) as t_state
  from ben.plants_csm
  group by t_fips, p_year
  order by t_fips, p_year;
quit;

